###############################
# Polity survival: Load data and create main analysis variable
#
#
# Replication Material for:
# Continuity or Change? (In)direct Rule in British and French Colonial Africa
# 
# Carl Mueller-Crepon, 2020
# International Organization
#
# File Description:
# Data preparation
#
# Called from scripts/polities/analysis.R 
#
##################################


# Load Data

#' Loads the following items:
#'   polity.yrs.df      Polities x year
#'   ruler.yrs.df       Rulers x year
#'   murdock.pol        Cross-section of Murdock groups; Polities aggregated to the Murdock group level
#'   pol.rast.df        Cross-section of raster cells with polity-information.
#'   murdock.cow.spdf   Murdock spatial data intersected with colony-borders

load("data/data_polities.RData")



# Encode variables need for analysis

# ... ever colonized?
polity.yrs.df$ever.colonized <- polity.yrs.df$polity.id %in% 
  unique(polity.yrs.df$polity.id[rowSums(polity.yrs.df[,paste0("col.",c("brit", "frnc", "ger", "port", "belg"))], 
                                         na.rm=T) > 0])

# ... ever colonized & existed prior to colonization?
polity.yrs.df$ever.precolonial <- polity.yrs.df$polity.id %in% 
  unique(polity.yrs.df$polity.id[polity.yrs.df$action.type %in% c("precolonization","conquest","independence") & 
                                   polity.yrs.df$ever.colonized])


# ... Death of polity when no further observation of rulers and year < 2006 (end of encyclopedia)
polity.yrs.df$polity.death <- ifelse(polity.yrs.df$year == polity.yrs.df$end.max & polity.yrs.df$end.max!= 2006 &
                                       polity.yrs.df$prec.end.miss == 0,
                                     1,0)

# ... polity time (for survival model)
polity.yrs.df$time <- polity.yrs.df$year - polity.yrs.df$start.min 

# ... start and stop dates for entire life-cycle
polity.yrs.df$start <- polity.yrs.df$time - 1
polity.yrs.df$stop <- polity.yrs.df$time


# Polity age
polity.yrs.df$polity.age = polity.yrs.df$year - polity.yrs.df$start.min + 1

# Colonizer based periods 
# These periods are those used in the main analysis.
polity.yrs.df <- join(polity.yrs.df,
                      data.frame(do.call(rbind,
                                         lapply(unique(polity.yrs.df$polity.id),
                                                function(p){
                                                  if(any(polity.yrs.df$polity.id == p & polity.yrs.df$ever.precolonial & polity.yrs.df$ever.colonized)){
                                                    
                                                    # Colonized years
                                                    col.years <- polity.yrs.df$year[polity.yrs.df$polity.id == p &
                                                                                      polity.yrs.df$colonized == 1]
                                                    
                                                    # Colonizer
                                                    colonizer <- polity.yrs.df$colonizer[polity.yrs.df$polity.id == p &
                                                                                           polity.yrs.df$colonized == 1]
                                                    
                                                    # Years of switching
                                                    switch.years <- col.years[c(1,1 + which(colonizer[2:length(col.years)] != colonizer[1:(length(col.years)-1)]))]
                                                    switch.years <- c(switch.years, max(col.years)+1)
                                                    
                                                    # Spells of colonization
                                                    spells <- unlist(lapply(c(1:(length(switch.years)-1)),
                                                                            function(x){1:(switch.years[x+1]-switch.years[x])}))
                                                    
                                                    # Year of colonization of this spell
                                                    min.col.year <- unlist(lapply(c(1:(length(switch.years)-1)),
                                                                                   function(x){rep(switch.years[x],(switch.years[x+1]-switch.years[x]))}))
                                                    
                                                    # Years towards colonization in next spell
                                                    p.yrs <- polity.yrs.df$year[polity.yrs.df$polity.id == p]
                                                    switch.years2 <- c(min(p.yrs),
                                                                       switch.years[switch.years < max(p.yrs)],
                                                                       max(p.yrs))
                                                    spell2col <- c(unlist(lapply(c(1:(length(switch.years2)-1)),
                                                                                 function(x){(switch.years2[x+1]-switch.years2[x]):1})),0)
                                                    
                                                    # Merge and return
                                                    join(data.frame(polity.id = p,
                                                                    yrs.2.col = spell2col,
                                                                    year = p.yrs),
                                                         data.frame(polity.id = p,
                                                                    start.col = spells-1,
                                                                    stop.col = spells,
                                                                    year = col.years,
                                                                    min.col.year = min.col.year),
                                                         type = "left", by = c("polity.id","year"))
                                                  } else { NULL}
                                                }))),
                      type = "left")



